% process_mRNA_microarray.m
% Chris Gowen
% April, 2012
% 
% This script is provided to accompany the chapter "Linking RNA 
% measurements and proteomics with genome-scale models" in Methods in
% Molecular Biology: Systems Metabolic Engineering, ed. Hal Alper
%
% It is provided for educational purposes only, without any warranty.
%
%
% The scripts in the supplementary data are intended to be run in the
% following order:
%   - process_mRNA_microarray.m
%   - Akesson_method.m
%   - Shlomi_method.m
%
% To do this, the user will need to have the additional functions
% shlomiAltOpt.m and mapGeneCalls.m, the data file log2_expression.mat, and
% the model file iMM904.mat available in the Matlab path. The user will
% also need to have COBRA toolbox and the IBM ILOG CPLEX linear programming
% solver installed and available. See the chapter for more details.


clear

load log2_expression.mat

% View histograms of log2-transformed expression data used here
figure()
subplot(2,1,1)
hist(log2(:,3),40)
title(sample_IDs(3));ylabel('frequency');xlabel('log2 expression level');
subplot(2,1,2)
hist(log2(:,14),40)
title(sample_IDs(14));ylabel('frequency');xlabel('log2 expression level');

% For the sake of example, we will call all transcripts with log2
% expression in the top and bottom 10th percentile for their sample set
% "high" and "low", respectively. This function calculates the cutoff
% values for each sample.
percentiles = prctile(log2,[10 90]);

% Create containers for high and low transcripts:
high_transcr = cell(1,length(sample_IDs));
low_transcr = cell(size(sample_IDs));

for i=1:length(sample_IDs)
    %Gather transcripts that fall in the top and bottom 10th percentile
    high_transcr{i} = find(log2(:,i)>percentiles(2,i));
    low_transcr{i} = find(log2(:,i)<percentiles(1,i));
end
